library(ggplot2)
library(tidyverse)
library(Seurat)
library(miscTools)
library(viridis)
library(venn)
Read in mutation table
mutation_table <- read.csv(file = paste(dir, "Data_input/geneCellMutationCounts_2.27.19.csv", sep = ""), header = T, row.names = 1)
head(mutation_table)
length(intersect(rownames(mutation_table), rownames(tiss_subset_tumor@meta.data)))
[1] 3620
#drop X.1 and X.2 to clean mutation table
mutation_table$X.1 <- NULL
mutation_table$X.2 <- NULL
Normalize the number of mutations by the raw # of transcripts found
# correct colnames
colnames(mutation_table) <- gsub(pattern = "\\.", replacement = "-", x = colnames(mutation_table))
# Subset table to tumor cells alone and
cells.to.use.tumor <- tiss_subset_tumor@meta.data$cell_id
mutation_table <- mutation_table[cells.to.use.tumor, ]
# save the names of the mutated genes
genes.to.use.mutations <- colnames(mutation_table)
length(genes.to.use.mutations)
[1] 14397
# set raw counts equal to the genes in the mutation table and subset to only tumor cells
raw_counts <- as.data.frame(as.matrix(tiss_subset_tumor@raw.data))[genes.to.use.mutations, cells.to.use.tumor]
# transpose the raw counts so that colnames are row ids and rownames are cell ids (same as the mutation table)
raw_counts1 <- as.data.frame(t(raw_counts))
dim(raw_counts1)
[1] 3620 14397
# normalize the mutation table by the raw counts
normalized_mutation_table <- mutation_table/raw_counts1
# Change NANs, NAs or Infs to 0
is.nan.data.frame <- function(x)
do.call(cbind, lapply(x, is.nan))
is.infinite.data.frame <- function(x)
do.call(cbind, lapply(x, is.infinite))
normalized_mutation_table[is.nan(normalized_mutation_table)] <- 0
normalized_mutation_table[is.na(normalized_mutation_table)] <- 0
normalized_mutation_table[is.infinite(normalized_mutation_table)] <- 0
Total Mutations Analysis
# Set Table to Report Only the Total Number of Mutations
total_mutations_table <- as.data.frame(rowSums(normalized_mutation_table))
head(total_mutations_table)
total_mutations_table$cell_name <- rownames(total_mutations_table)
colnames(total_mutations_table) <- c('total_mutations_score','cell_id')
head(total_mutations_table)
Add Metadata to normalized mutation table
total_mutations_table$cell_id <- rownames(total_mutations_table)
total_mutations_table_wmetadata <- left_join(total_mutations_table, tiss_subset_tumor@meta.data, by = 'cell_id')
rownames(total_mutations_table_wmetadata) <- total_mutations_table_wmetadata$cell_id
Total Mutations detected by Group
Add total mutation score to metadata
Plot

---
title: "R Notebook"
output: html_notebook
---

```{r}
library(ggplot2)
library(tidyverse)
library(Seurat)
library(miscTools)
library(viridis)
library(venn)
```

Read in mutation table
```{r}
# rm(list=ls())
dir <- "/myVolume/scell_lung_adenocarcinoma/"
load(file=paste(dir,"Data_input/NI04_tumor_seurat_object.RData", sep=""))
mutation_table <- read.csv(file = paste(dir, "Data_input/geneCellMutationCounts_2.27.19.csv", sep = ""), header = T, row.names = 1)
head(mutation_table)
length(intersect(rownames(mutation_table), rownames(tiss_subset_tumor@meta.data)))
```

```{r}
#drop X.1 and X.2 to clean mutation table
mutation_table$X.1 <- NULL
mutation_table$X.2 <- NULL
```

Normalize the number of mutations by the raw # of transcripts found
```{r}
# correct colnames
colnames(mutation_table) <- gsub(pattern = "\\.", replacement = "-", x = colnames(mutation_table))
# Subset table to tumor cells alone and 
cells.to.use.tumor <- tiss_subset_tumor@meta.data$cell_id
mutation_table <- mutation_table[cells.to.use.tumor, ]
# save the names of the mutated genes
genes.to.use.mutations <- colnames(mutation_table)
length(genes.to.use.mutations)
# set raw counts equal to the genes in the mutation table and subset to only tumor cells
raw_counts <- as.data.frame(as.matrix(tiss_subset_tumor@raw.data))[genes.to.use.mutations, cells.to.use.tumor]
# transpose the raw counts so that colnames are row ids and rownames are cell ids (same as the mutation table)
raw_counts1 <- as.data.frame(t(raw_counts))
dim(raw_counts1)
# normalize the mutation table by the raw counts
normalized_mutation_table <- mutation_table/raw_counts1
# Change NANs, NAs or Infs to 0
is.nan.data.frame <- function(x)
do.call(cbind, lapply(x, is.nan))
is.infinite.data.frame <- function(x)
do.call(cbind, lapply(x, is.infinite))
normalized_mutation_table[is.nan(normalized_mutation_table)] <- 0
normalized_mutation_table[is.na(normalized_mutation_table)] <- 0
normalized_mutation_table[is.infinite(normalized_mutation_table)] <- 0
```

Total Mutations Analysis
```{r}
# Set Table to Report Only the Total Number of Mutations
total_mutations_table <- as.data.frame(rowSums(normalized_mutation_table))
head(total_mutations_table)
total_mutations_table$cell_name <- rownames(total_mutations_table)
colnames(total_mutations_table) <- c('total_mutations_score','cell_id')
head(total_mutations_table)
```

Add Metadata to normalized mutation table
```{r}
total_mutations_table$cell_id <- rownames(total_mutations_table)
total_mutations_table_wmetadata <- left_join(total_mutations_table, tiss_subset_tumor@meta.data, by = 'cell_id')
rownames(total_mutations_table_wmetadata) <- total_mutations_table_wmetadata$cell_id
```

Total Mutations detected by Group
```{r}
pr_mutation_table <- filter(total_mutations_table_wmetadata, analysis == 'grouped_pr')
pd_mutation_table <- filter(total_mutations_table_wmetadata, analysis == 'grouped_pd')
naive_mutation_table <- filter(total_mutations_table_wmetadata, analysis == 'naive')

# Driver Gene
total_mutations_table_driver <- filter(total_mutations_table_wmetadata, driver_gene == 'EGFR' | driver_gene == 'ALK')
```

Add total mutation score to metadata
```{r}
tiss_subset_tumor@meta.data <- left_join(tiss_subset_tumor@meta.data, total_mutations_table_wmetadata[,1:2])
rownames(tiss_subset_tumor@meta.data) <- tiss_subset_tumor@meta.data$cell_id
save(tiss_subset_tumor, file=paste(dir,"Data_input/NI08_tumor_seurat_object_withmut.RData", sep=""))
```

Plot
```{r}
total_mt_score <- pairwise.wilcox.test(x = total_mutations_table_wmetadata$total_mutations_score, g = total_mutations_table_wmetadata$analysis)

# pdf(file = paste(dir, "plot_out/NI08/NI08_total_muts_by_group.pdf", sep = ""))
ggplot(total_mutations_table_wmetadata, aes(x = analysis, y = total_mutations_score, fill = analysis)) + geom_boxplot(outlier.shape = NA) + 
    geom_jitter(alpha = 1/8, aes(colour = analysis)) + guides(colour = FALSE, fill = FALSE) + xlab("Group") + ylab("Mutation Score") + 
    ggtitle("Distribution of Mutation Scores per Cell") + 
    geom_signif(comparisons = list(c("grouped_pd", "grouped_pr")), map_signif_level=TRUE, y_position = 45, annotations = '< 2e-16') + 
    geom_signif(comparisons = list(c("grouped_pd", "naive")), map_signif_level=TRUE, y_position = 50, annotations = '< 2e-16') + 
    geom_signif(comparisons = list(c("grouped_pr", "naive")), map_signif_level=TRUE, y_position = 40, annotations = '0.023') +
    scale_x_discrete(labels = c("naive" = "TN", "grouped_pr" = "PER", "grouped_pd" = "PD"), limits=c("naive","grouped_pr","grouped_pd"))
# dev.off()

# pdf(file = paste(dir, "plot_out/NI08/NI08_total_muts_by_group_ridgeplot.pdf", sep = ""))
ggplot(total_mutations_table_wmetadata, aes(x = total_mutations_score, y = analysis, fill = analysis)) + geom_density_ridges(alpha=.7) + 
    ylab("Group") + xlab("Mutation Score") + scale_y_discrete(labels = c("PD", "PER", "TN")) + 
    theme(legend.position = "none") + theme(axis.title.y = element_blank())
# dev.off()

# pdf(file = paste(dir, "plot_out/NI08/NI08_total_muts_by_group_and_patient.pdf", sep = ""))
ggplot(total_mutations_table_wmetadata, aes(x = analysis, y = total_mutations_score, color = patient_id)) +
    geom_point(position = position_jitterdodge(dodge.width=0.9)) +
    geom_boxplot(outlier.colour = NA, position = position_dodge(width=0.9)) +
    xlab("Group") +
    ylab("Mutation Score") +
    ggtitle("Distribution of Mutation Scores per Cell") +
    guides(colour=guide_legend(title="Response Group")) +
    guides(fill = FALSE, color = FALSE) +
    scale_x_discrete(labels = c("naive" = "TN", "grouped_pr" = "PER", "grouped_pd" = "PD"), limits=c("naive","grouped_pr","grouped_pd"))
# dev.off()

# pdf(file = paste(dir, "plot_out/NI08/NI08_total_muts_by_primvsmet.pdf", sep = ""))
ggplot(data=subset(total_mutations_table_wmetadata, !is.na(primary_or_metastaic)), aes(x = primary_or_metastaic , y = total_mutations_score, color = analysis)) +
      geom_point(position = position_jitterdodge(dodge.width=0.9)) +
      geom_boxplot(outlier.colour = NA, position = position_dodge(width=0.9)) +
      xlab("Group") +
      ylab("Mutation Score") +
      ggtitle("Distribution of Mutation Scores per Cell") +
      guides(colour=guide_legend(title="Response Group")) +
      guides(fill = FALSE)
# dev.off()
```

